import json
import re
import xlwt

with open('zhuyuan.json') as f:
    data = json.load(f)

l = [(k, [k]) for k in data.keys()]
l = [(k,
      [y.replace('@nbsp;', ' ').replace('中医诊断:', ' ').replace('中医诊断：', ' ').replace('西医诊断:', ' ').replace('西医诊断：',
                                                                                                          ' ').replace(
          '西医：', ' ').replace('西医:', ' ').replace('中医：', ' ').replace('中医:', ' ').strip()
       for y in x]) for k, x in l]

l = [(k, [re.sub(r'[A-Z]\d\d\.\d\d\d-', '', y) for y in x]) for k, x in l]
l = [(k, [re.sub(r'[A-Z]\d\d\.\d\d\d', '', y) for y in x]) for k, x in l]

regex_list = [
    '\d\d\.+',
    '\d\d、',
    '\d\.+',
    '\d、',
    '；',
    '。'
    # '，'
]

#  \d\.+
# 多了空格拆分
# regex_list = ['\d\.\.\.',
#               '\d\.\.',
#               '\d\.',
#               '\d、',
#               '；',
#               '。', '\s+']

for regex_pattern in regex_list:
    l = [(k, [y.strip().strip('？').strip(':').strip('、').strip('：').strip('。').strip('.') for x in lst for y in
              re.split(regex_pattern, x)]) for k, lst in l]
    l = [(k, [y for y in x if len(y.strip()) > 0]) for k, x in l]
    # print(l[0])

# print(l[0])
# print(l[0][0], l[0][1])

# for i in range(22):
#     if i == 21:
#         with open('/Users/rottengeek/result/' + '22' + '.txt', encoding='utf-8', mode='w') as f:
#             for j in range(21 * 1000, 21 * 1000 + 670):
#                 f.write(l[j][0] + '\n')
#         with open('/Users/rottengeek/result/' + '22' + '_.txt', encoding='utf-8', mode='w') as f2:
#             for j in range(21 * 1000, 21 * 1000 + 670):
#                 f2.write(' '.join(l[j][-1]) + '\n')
#         break
#     with open('/Users/rottengeek/result/' + str(i + 1) + '.txt', encoding='utf-8', mode='w') as f:
#         for j in range(i * 1000, 1000 * (i + 1)):
#             f.write(l[j][0] + '\n')
#     with open('/Users/rottengeek/result/' + str(i + 1) + '_.txt', encoding='utf-8', mode='w') as f2:
#         for j in range(i * 1000, 1000 * (i + 1)):
#             f2.write(' '.join(l[j][-1]) + '\n')

# text = '*'.join([l[i][0]] + l[i][-1])
# f.write(text + '\n')

# 写入excel

book = xlwt.Workbook()
sheet1 = book.add_sheet('sheet1', cell_overwrite_ok=True)
for index, item in enumerate(l):
    sheet1.write(index, 0, item[0])
    for index_, item_ in enumerate(item[-1]):
        sheet1.write(index, index_ + 1, item_)
book.save('/Users/rottengeek/zhuyuan.xls')
